library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(qiime2R)
library(ggpubr)
## 
## Attaching package: 'ggpubr'
## 
## The following object is masked from 'package:qiime2R':
## 
##     mean_sd
library(ggplot2)
library(magrittr)
## 
## Attaching package: 'magrittr'
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
library(ggh4x)
library(knitr)
library(DT)

Needed File Paths

metadata_FP <- '../data/misc/s1_filt_comp_metadata.tsv'
uw_dist_fp <- '../data/s1_filt_core/uw_dist_matrix.tsv'
w_dist_fp <- '../data/s1_filt_core/w_dist_matrix.tsv'

Reading in Distance Matrices and Metadata

metadata <- read_tsv(metadata_FP)
## Rows: 462 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (4): #SampleID, sample_type, facility, diet
## dbl (7): day_post_inf, mouse_id, high_fat, high_fiber, purified_diet, seq_de...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
names(metadata)[names(metadata) == '#SampleID'] <- 'sampleid'

uw_dist <- read_tsv(uw_dist_fp)
## New names:
## Rows: 459 Columns: 460
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "\t" chr
## (1): ...1 dbl (459): 2_0418_9740, 2_0418_9741, 2_0418_9742, 2_0418_9743,
## 2_0418_9744, ...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
names(uw_dist)[names(uw_dist) == '...1'] <- 'row_sampleid'

w_dist <- read_tsv(w_dist_fp)
## New names:
## Rows: 459 Columns: 460
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "\t" chr
## (1): ...1 dbl (459): 2_0418_9740, 2_0418_9741, 2_0418_9742, 2_0418_9743,
## 2_0418_9744, ...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
names(w_dist)[names(w_dist) == '...1'] <- 'row_sampleid'

Data Wrangling

metadata %>% 
  select(sampleid, sample_type, mouse_id, diet) -> mini_meta

uw_dist %>% 
  gather(-row_sampleid, key = col_sampleid, value = dist) %>% 
  merge(mini_meta, by.x = 'row_sampleid', by.y = 'sampleid') %>% 
  merge(mini_meta, by.x = 'col_sampleid', by.y = 'sampleid') %>% 
  mutate(dist_key = paste(row_sampleid, col_sampleid, sep = "_"),
         samp_type_key = paste(sample_type.x, sample_type.y, sep = "_"),
         mouse_id_key = paste(mouse_id.x, mouse_id.y, sep = "_")) -> uw_dist


## stool sample paired by mouse id table
uw_dist %>% 
  filter(sample_type.x == 'colon',
         sample_type.y == 'colon',
         mouse_id.x == mouse_id.y) %>% 
  mutate(class = 'stool_same_mouse') -> uw_dist_mouse

## matched by mouse id cecal and stool sample table
uw_dist %>% 
  filter(sample_type.x != sample_type.y,
         mouse_id.x == mouse_id.y) %>% 
  mutate(class = 'cecal_stool_matched') -> uw_dist_cecStool_match

## unmatched by mouse id cecal and stool sample table
uw_dist %>% 
  filter(sample_type.x != sample_type.y,
         mouse_id.x != mouse_id.y) %>% 
  mutate(class = 'cecal_stool_unmatched') -> uw_dist_cecStool_unmatch

## putting all tables together
rbind(uw_dist_mouse,
      uw_dist_cecStool_match,
      uw_dist_cecStool_unmatch) -> uw_dist_giant

uw_dist_giant %>% 
  filter(row_sampleid != col_sampleid) -> uw_dist_giant

Tables I Created

DT::datatable(uw_dist_mouse)
DT::datatable(uw_dist_cecStool_match)
DT::datatable(uw_dist_cecStool_unmatch)
## Warning in instance$preRenderHook(instance): It seems your data is too big for
## client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html

Attempt at Some Kind of Plot

uw_dist_giant %>% 
  ggplot(aes(x = class, y = dist)) +
  geom_boxplot(aes(group = class, fill = class), alpha = 0.5) +
  # geom_jitter(alpha = 0.3, width = 0.1, height = 0) +
  theme_bw() 

  # facet_wrap(~diet.x)